// This file is part of AceWiki.
// Copyright 2013, AceWiki developers.
//
// AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
// not, see http://www.gnu.org/licenses/.
package ch.uzh.ifi.attempto.acewiki.gf;
import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
import static ch.uzh.ifi.attempto.ape.OutputType.OWLFSSPP;
import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringEscapeUtils;
import org.semanticweb.owlapi.model.OWLAxiom;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.SWRLRule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import ch.uzh.ifi.attempto.acewiki.core.Declaration;
import ch.uzh.ifi.attempto.acewiki.core.MultilingualSentence;
import ch.uzh.ifi.attempto.acewiki.core.OntologyElement;
import ch.uzh.ifi.attempto.acewiki.core.PrettyTextElement;
import ch.uzh.ifi.attempto.acewiki.core.SentenceDetail;
import ch.uzh.ifi.attempto.acewiki.owl.AceWikiOWLReasoner2;
import ch.uzh.ifi.attempto.acewiki.owl.OWLSentence;
import ch.uzh.ifi.attempto.ape.ACEParserResult;
import ch.uzh.ifi.attempto.ape.ACEText;
import ch.uzh.ifi.attempto.base.DefaultTextOperator;
import ch.uzh.ifi.attempto.base.MultiTextContainer;
import ch.uzh.ifi.attempto.base.TextContainer;
import ch.uzh.ifi.attempto.base.TextElement;
import ch.uzh.ifi.attempto.base.TextOperator;
import ch.uzh.ifi.attempto.echocomp.LocaleResources;
import ch.uzh.ifi.attempto.gfservice.GfServiceException;
/**
* This class represents a sentence for the GF AceWiki engine.
* A "sentence" is a tree set that can be linearized into multiple
* languages.
*
* @author Kaarel Kaljurand
*/
public abstract class GfSentence extends MultilingualSentence implements OWLSentence {
private final Logger mLogger = LoggerFactory.getLogger(GfSentence.class);
protected final GfGrammar mGfGrammar;
protected final GfWikiEntry mGfWikiEntry;
// Use the original input in the text container
// TODO: this is a hack
boolean mUseOriginal = true;
// Maps a language identifier to the set of linearizations (text containers) in this language
final Map<String, MultiTextContainer> textContainers = new HashMap<>();
// Maps a tree to the set of linearizations for each language.
// ie. Map<Tree, Map<Language, Set<Linearization>>>
// Lazy initialized as tree linearizations are requested, but done for all languages at once - performance reasons.
private final Map<String, Map<String, Set<String>>> treeLinearizations = new HashMap<>();
// These fields are evaluated lazily
private Boolean isReasonable;
private Boolean isOWL;
private Boolean isOWLSWRL;
private Set<OWLAxiom> owlAxioms;
/**
* Creates a declaration from an existing wiki entry (from the storage).
* Each wiki entry is tagged with the language in which the entry was created.
* Each wiki entry also contains the original text that was used to create the
* trees. In case the trees can not be linearized (because the grammar has changed)
* then we try to parse the original text instead (which might also fail).
*/
public GfSentence(GfGrammar grammar, GfWikiEntry entry) {
mGfGrammar = grammar;
boolean isParseable = (entry.getLanguage() != null && entry.getText() != null);
if (isParseable && hasBadTrees(entry.getLanguage(), entry.getTrees())) {
mGfWikiEntry = createGfWikiEntry(grammar, entry.getLanguage(), entry.getText());
} else {
mGfWikiEntry = entry;
}
}
/**
* Creates a declaration object from the given text.
* The text will be parsed to get the trees.
*/
public GfSentence(GfGrammar grammar, String language, String text) {
mGfGrammar = grammar;
mGfWikiEntry = createGfWikiEntry(grammar, language, text);
}
public static GfSentence createGfSentence(GfGrammar grammar, GfWikiEntry entry) {
if (entry.getText().contains("?")) {
return new GfQuestion(grammar, entry);
}
return new GfDeclaration(grammar, entry);
}
/**
* Maps this declaration to its visual representation in the given language.
* The declaration is a set of trees.
* Each tree can in principle have multiple linearizations (variants), but we currently
* consider only the first (canonical) variant.
* We have to handle the following:
* - the linearization fails (e.g. tree is malformed)
* - a tree has an empty set of linearizations
* - a linearization is an empty string
* - a lineariation repeats
* - the user wants to see what he/she originally entered not a rewrite into the canonical variant
*/
public MultiTextContainer getTextContainer(String language) {
MultiTextContainer mtc = textContainers.get(language);
if (mtc == null) {
List<TextContainer> tmp = new ArrayList<>();
TextOperator to = getTextOperator(language);
// If the text is requested in the original language (i.e. in which the entry was first created)
// then we return the original text. The benefit is that we do not need to make a call
// to the linearizer. Also, there is no danger that the original text would be replaced by a variant
// (e.g. "does not -> doesn't") which would be confusing.
if (mUseOriginal && language.equals(mGfWikiEntry.getLanguage()) && mGfWikiEntry.getText() != null) {
tmp = ImmutableList.of(makeTextContainer(to, mGfWikiEntry.getText()));
} else {
Set<String> seen = Sets.newHashSet();
for (String tree : mGfWikiEntry.getTrees().getTrees()) {
Set<String> lins = getLins(tree, language);
if (lins == null) {
mLogger.info("getTextContainer: null {}: {}", language, tree);
// TODO do it properly
tmp.add(new TextContainer(new TextElement("-NULL-" + tree)));
} else if (lins.isEmpty()) {
mLogger.info("getTextContainer: empty {}: {}", language, tree);
// TODO do it properly
tmp.add(new TextContainer(new TextElement("-EMPTY-" + tree)));
} else {
String lin = lins.iterator().next();
if (lin.isEmpty() || seen.contains(lin)) {
// Don't show an empty lin and the same lin twice
} else {
seen.add(lin);
tmp.add(makeTextContainer(to, lin));
}
}
}
}
if (tmp.isEmpty()) {
tmp.add(new TextContainer(new TextElement("-NO_LINEARIZATION_FOUND-")));
}
mtc = new MultiTextContainer(tmp);
textContainers.put(language, mtc);
}
return mtc;
}
/**
* TODO: this is temporary. Also note that it does not have effect on the
* original input as this is always taken from mGfWikiEntry.getText()
*/
public void clearLinearizations() {
textContainers.clear();
treeLinearizations.clear();
mUseOriginal = false;
}
TextContainer makeTextContainer(TextOperator to, String str) {
TextContainer tc = new TextContainer(to);
for (String s : to.splitIntoTokens(str)) {
tc.addElement(new PrettyTextElement(s));
}
return tc;
}
/**
* TODO
*/
public boolean contains(OntologyElement e) {
return false;
}
/**
* Returns the details of this tree set:
*
* - abstract trees;
* - translations;
* - abstract tree diagram;
* - parse tree diagram;
* - word alignment diagram;
* - ...
*
* The output highlights the given language.
*
* TODO: everything should be hyperlinked.
*/
public List<SentenceDetail> getDetails(String lang, int index) {
List<SentenceDetail> l = new ArrayList<SentenceDetail>();
if (mGfGrammar.isAceCompatible()) {
l.addAll(getSemantics(index));
}
l.addAll(formatTree(mGfGrammar, lang, index));
return l;
}
public int getNumberOfRepresentations() {
return mGfWikiEntry.getTrees().size();
}
public List<String> getParseTrees() {
return mGfWikiEntry.getTrees().getTrees();
}
public GfWikiEntry getGfWikiEntry() {
return mGfWikiEntry;
}
public String serialize() {
return GfGrammar.serialize(mGfWikiEntry);
}
public void update() {
Set<Set<OWLAxiom>> setOfSetOfAxiom = null;
String uri = getOntology().getURI();
try {
setOfSetOfAxiom = GfOwlConverter.convert(mGfGrammar, uri, mGfWikiEntry);
} catch (OWLOntologyCreationException e1) {
// TODO
}
if (setOfSetOfAxiom == null || setOfSetOfAxiom.isEmpty()) {
isOWLSWRL = isOWL = isReasonable = false;
owlAxioms = new HashSet<OWLAxiom>();
} else {
isOWLSWRL = isOWL = isReasonable = true;
owlAxioms = GfOwlConverter.disambiguate(setOfSetOfAxiom);
// TODO: currently not reasoning with SWRL rules, this should
// be controlled by the profile instead
for (OWLAxiom ax : owlAxioms) {
if (ax instanceof SWRLRule) {
isOWL = isReasonable = false;
mLogger.info("Axiom is SWRL rule: {}", ax);
break;
}
}
}
// TODO: check also questions somehow, e.g. EL probably does not allow inverse properties in questions
if (isReasonable && this instanceof Declaration) {
AceWikiOWLReasoner2 reasoner = (AceWikiOWLReasoner2) getOntology().getReasoner().getWrappedReasoner();
isReasonable = GfOwlConverter.isReasonable(reasoner, owlAxioms);
}
if (!isReasonable && isIntegrated()) {
super.setIntegrated(false);
}
}
// TODO: this method does not make sense for GF-wiki entries
// because they can be ambiguous.
public String getPrettyOWL() {
return null;
}
public boolean isReasonable() {
if (isReasonable == null) {
update();
}
return isReasonable;
}
public boolean isOWL() {
if (isOWL == null) {
update();
}
return isOWL;
}
public boolean isOWLSWRL() {
if (isOWLSWRL == null) {
update();
}
return isOWLSWRL;
}
public Set<OWLAxiom> getOWLAxioms() {
if (owlAxioms == null) {
update();
}
return owlAxioms;
}
/**
* Returns the grammar object.
*
* @return The grammar object.
*/
protected GfGrammar getGfGrammar() {
return mGfGrammar;
}
// Return some of the APE analysis of the tree at the given index.
// The APE analysis is obtained by first linearizing the tree in "Ape".
// This only works if the wiki is ACE-based.
//
// TODO: experimental
private List<SentenceDetail> getSemantics(int index) {
String tree = mGfWikiEntry.getTrees().getTrees().get(index);
List<SentenceDetail> l = new ArrayList<SentenceDetail>();
if (tree == null) {
l.add(new SentenceDetail("ERROR", "Statement is not well-formed"));
return l;
}
try {
ACEText acetext = GfWikiUtils.getACEText(mGfGrammar, tree);
ACEParserResult pr = GfWikiUtils.parse(acetext, getOntology().getURI(), PARAPHRASE1, OWLFSSPP, DRSPP);
l.add(new SentenceDetail("ACE", "<pre>" + StringEscapeUtils.escapeHtml(acetext.getText()) + "</pre>"));
l.add(new SentenceDetail("ACE (paraphrase)", "<pre>" + StringEscapeUtils.escapeHtml(pr.get(PARAPHRASE1)) + "</pre>"));
l.add(new SentenceDetail("OWL", "<pre>" + StringEscapeUtils.escapeHtml(pr.get(OWLFSSPP)) + "</pre>"));
l.add(new SentenceDetail("DRS", "<pre>" + StringEscapeUtils.escapeHtml(pr.get(DRSPP)) + "</pre>"));
l.add(new SentenceDetail("Lexicon", "<pre>" + StringEscapeUtils.escapeHtml(Joiner.on('\n').join(acetext.getLexicon().getEntries())) + "</pre>"));
l.add(new SentenceDetail("Messages",
"<pre>" + StringEscapeUtils.escapeHtml(Joiner.on('\n').join(pr.getMessageContainer().getMessages())) + "</pre>"));
} catch (Exception e) {
l.add(new SentenceDetail("ERROR", e.getMessage()));
}
return l;
}
private String getAbstrtreeAsHtml(String tree) {
try {
return getImg(getGfGrammar().abstrtree(tree));
} catch (GfServiceException e) {
return getError(e);
}
}
private String getParsetreeAsHtml(String tree, String language) {
try {
return getImg(getGfGrammar().parsetree(tree, language));
} catch (GfServiceException e) {
return getError(e);
}
}
private String getImg(String dataUri) {
return "<a href=\"" + dataUri + "\"><img src=\"" + dataUri + "\" style=\"max-height:500px\"/></a>";
}
private static String getError(Exception e) {
return "<p style=\"color: red\">" + e.getMessage() + "</p>";
}
private List<SentenceDetail> formatTree(GfGrammar grammar, String lang, int index) {
String tree = mGfWikiEntry.getTrees().getTrees().get(index);
List<SentenceDetail> l = new ArrayList<SentenceDetail>();
l.add(new SentenceDetail("acewiki_details_syntree", getParsetreeAsHtml(tree, lang)));
l.add(new SentenceDetail(
LocaleResources.getString("acewiki_details_internal") + " (ASCII)",
"<p><code>" + tree + "</code></p>"
));
l.add(new SentenceDetail("acewiki_details_internal", getAbstrtreeAsHtml(tree)));
return l;
}
Set<String> getLins(String tree, String language) {
// Linearization of a single tree to all possible languages.
Map<String, Set<String>> tl = treeLinearizations.get(tree);
if (tl == null) {
try {
tl = getGfGrammar().linearize(tree);
treeLinearizations.put(tree, tl);
} catch (GfServiceException e) {
// TODO find out what happened, i.e.
// why was the tree not supported by the grammar.
mLogger.warn("tree not supported by the grammar - {}: {}", language, tree);
return null;
}
}
return tl.get(language);
}
private boolean hasBadTrees(String language, TreeList treeList) {
for (String tree : treeList.getTrees()) {
Set<String> lins = getLins(tree, language);
if (lins == null || lins.isEmpty()) {
return true;
}
}
return false;
}
private static GfWikiEntry createGfWikiEntry(GfGrammar grammar, String language, String text) {
try {
Set<String> trees = grammar.parse(text, language);
if (trees == null) {
throw new RuntimeException("No such language: " + language);
}
if (trees.isEmpty()) {
// TODO this should be done properly; see GfTextOperator
// If parsing fails: first char to lower case
text = DefaultTextOperator.firstCharToLowerCase(text);
trees = grammar.parse(text, language);
}
return new GfWikiEntry(language, text, new TreeList(trees));
} catch (GfServiceException e) {
throw new RuntimeException(e.getMessage());
}
}
}